/*******************************************************************************
*Project :          COVID-19 Research Project 

*Purpose : 		    Summary : Womply 

*Source : 			Womply Data from AWS 


*******************************************************************************/

clear all 
set more off 
capture log close 

* generate global date
global date=subinstr(c(current_date), " ", "_", .)
set obs 1 
gen date="${date}"
split date, parse("_")
cap replace date=date3+"_"+date2+"_"+date4 if date1==""
cap replace date=date2+"_"+date1+"_"+date3 if date1!=""
global date=date[1]
display as error "Today's date = ${date}"


* copy summary table to summary directory
cap noisily erase "tables/summary/summary_womply_total.xlsx"
cap noisily erase "tables/summary/summary_womply_business.xlsx"

copy "tables/templates/summary_template_womply_business.xlsx" "tables/summary/summary_womply_business.xlsx"
copy "tables/templates/summary_template_womply_total.xlsx" "tables/summary/summary_womply_total.xlsx"


*===============================================================================
* Get summary - Totals 
*===============================================================================

* load womply daily revenue data 
  use "data/clean/womply/womply_daily_revenue.dta" , clear 
  
* get population values - 99% of data merges 
  merge m:1 county_fips using "data/clean/usda_ers/county_information.dta" , keep(3) nogen ///
	keepusing(pop_estimate_2018) 

  encode(category) , gen(cat) 
  drop category 
  
  keep date county_fips revenue cat
  
* reshape data 
  rename revenue rev_
  reshape wide rev_  , i(date county_fips) j(cat) 
  
* subset data to March 2020 
  keep if inrange(date, td(01mar2020) , td(07mar2020))
  

* replace missing values , generate per capita
  foreach var of varlist rev_* { 
	replace `var' = 0 if missing(`var')
	}
	
* get overall value 
  egen rev_22 = rowtotal(rev_*)
  
* reshape again 
  reshape long  rev_ , i(date county_fips) j(cat)
  
* subset data to March 2020 
  keep if inrange(date, td(01mar2020) , td(07mar2020))
 
* collapse data 
  ren rev_ rev 
  collapse (mean) avg_rev = rev ///
           (median) med_rev = rev ///
		   (p10)  p10_rev = rev ///
		   (p90)  p90_rev = rev ///
		   (sd)  sd_rev  = rev , /// 
		    by(cat) 
			
  decode(cat) , gen(category)
  replace category = "overall" if cat == 22 
   
* get overall category variable 
  gen clean_cat="Other" if category!="overall"
  replace clean_cat= "Restaurant" if inlist(category,"bars and lounges","food and beverage shops","quick serve food and beverage businesses","restaurants")
  tab category clean_cat, m

* format category 
 replace category = proper(category)
 replace category = subinstr(category, "And" , "and" ,.) 
  
* sort and export 
  sort category , stable 
  replace clean_cat = "x" if category =="Overall"
  sort clean_cat , stable 
  replace clean_cat = "" if category=="Overall" 

* keep certain variables 
  drop cat 
  order category 

* export file 
export excel using "tables/summary/summary_womply_total.xlsx" , cell(A4) sheetmodify ///
	   keepcellfmt 
	   


*===============================================================================
* Get summary - per merchant
*===============================================================================

* load womply daily revenue data 
  use "data/clean/womply/womply_daily_revenue.dta" , clear 
  
* get population values - 99% of data merges 
  merge m:1 county_fips using "data/clean/usda_ers/county_information.dta" , keep(3) nogen ///
	keepusing(pop_estimate_2018) 
	
* get average no. of merchants 
 preserve 
  keep if inrange(date, td(01mar2020) , td(07mar2020))
  collapse merchants ,  by(county_fips category)
  ren merchants avg_merchants 
  tempfile merchants 
  save `merchants' , replace 
 restore 
 
 merge m:1 county_fips category using `merchants' , assert(1 3) nogen 

  encode(category) , gen(cat) 
  drop category pre_covid_merchant_size intra_covid_active_merchants ///
  intra_covid_inactive_merchants share_firms_no_rev 
  
* reshape data 
  drop merchants 
  rename revenue rev_
  rename avg_merchants merch_ 
  reshape wide rev_  merch_ , i(date county_fips) j(cat) 
  
* replace missing values , generate per merchant measure 
  foreach var of varlist rev_* merch_* { 
	replace `var' = 0 if missing(`var')
	}
	
		
  forv i = 1/21 { 
	g pm_rev_`i' = rev_`i' /merch_`i' 
	} 
	
	
* get overall value 
  egen rev_22 = rowtotal(rev_*)
  egen merch_22 = rowtotal(merch_*)
  gen pm_rev_22 = rev_22/merch_22 
  
* reshape again 
  reshape long pm_rev_ rev_ merch_ , i(date county_fips) j(cat)
  
* collapse data 
  ren pm_rev_ pm_rev
  
* subset data to March 2020 
  keep if inrange(date, td(01mar2020) , td(07mar2020))
 
* collapse data 
  collapse (mean) avg_rev = pm_rev ///
           (median) med_rev = pm_rev ///
		   (p10)  p10_rev = pm_rev ///
		   (p90)  p90_rev = pm_rev ///
		   (sd)  sd_rev  = pm_rev [weight = merch_]  , /// 
		    by(cat) 
			

  decode(cat) , gen(category)
  replace category = "overall" if cat == 22 
   
* get overall category variable 
  gen clean_cat="Other" if category!="overall"
  replace clean_cat= "Restaurant" if inlist(category,"bars and lounges","food and beverage shops","quick serve food and beverage businesses","restaurants")
  tab category clean_cat, m


* format category 
 replace category = proper(category)
 replace category = subinstr(category, "And" , "and" ,.) 
  
* sort and export 
  sort category , stable 
  replace clean_cat = "x" if category =="Overall"
  sort clean_cat , stable 
  replace clean_cat = "" if category=="Overall" 

* keep certain variables 
  drop cat 
  order category 

* export file 
export excel using "tables/summary/summary_womply_business.xlsx" , cell(A4) sheetmodify ///
	   keepcellfmt 
